library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
library(purrr)
library(plm)
library(lmtest)
library(sandwich)
library(ordinal)
library(ggplot2)

##############
## Analysis ##
##############
pdata <- read.csv("scad_country_final.csv")

library(slider)
library(lubridate)

pdata <- pdata %>%
  mutate(startdate = as.Date(startdate)) %>%
  arrange(ccode, startdate) %>%
  group_by(ccode) %>%
  mutate(
    n_events_recent365 = slide_index_int(
      .x = startdate,    
      .i = startdate,    
      .f = ~length(.x),   
      .before = days(365),  
      .complete = FALSE
    )
  ) %>%
  ungroup()

pdata <- pdata.frame(pdata, index = c("ccode", "startdate"))

pdata$participants_lead <- factor(pdata$participants_lead, ordered = TRUE)

pdata_clmm <- pdata %>%
  filter(!is.na(participants_max),
         !is.na(fatality_diff_signed_weighted),
         !is.na(v2x_polyarchy),
         !is.na(v2xel_locelec),
         !is.na(gdppc),
         !is.na(pop))

poly_terms <- poly(pdata_clmm$fatality_diff_signed_weighted, 2)
pdata_clmm$poly1 <- poly_terms[, 1]
pdata_clmm$poly2 <- poly_terms[, 2]

m_clmm_quad <- clmm(as.factor(participants_max) ~ poly1 + poly2 + n_events_recent365 +
                      v2x_polyarchy + v2xel_locelec + gdppc + pop +
                      (1 | ccode),
                    data = pdata_clmm,
                    Hess = TRUE)

coefs <- coef(m_clmm_quad)
vcov_mat <- vcov(m_clmm_quad)

scad_result1_country <- m_clmm_quad

x_seq <- seq(min(pdata_clmm$fatality_diff_signed_weighted, na.rm = TRUE),
             max(pdata_clmm$fatality_diff_signed_weighted, na.rm = TRUE),
             length.out = 100)

poly_deriv <- predict(poly_terms, newdata = x_seq, deriv = 1)

grad_matrix <- cbind(poly_deriv[, 1], poly_deriv[, 2])
colnames(grad_matrix) <- c("poly1", "poly2")

marginal_effect <- grad_matrix[, 1] * coefs["poly1"] +
  grad_matrix[, 2] * coefs["poly2"]

marginal_se <- apply(grad_matrix, 1, function(g) {
  sqrt(t(g) %*% vcov_mat[c("poly1", "poly2"), c("poly1", "poly2")] %*% g)
})

prediction_data <- data.frame(
  x = x_seq,
  marginal_effect = marginal_effect,
  ci_low = marginal_effect - 1.96 * marginal_se,
  ci_high = marginal_effect + 1.96 * marginal_se
)

scad_marginal1_country <- ggplot(prediction_data, aes(x = x, y = marginal_effect)) +
  geom_line(color = "black", size = 0.5) +
  geom_ribbon(aes(ymin = ci_low, ymax = ci_high), fill = "black", alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Marginal Effect of Repression Deviations\n(SCAD: Country)",
    x = "Repression Deviations",
    y = "Marginal Effect on Protest Participation"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 15, margin = margin(t = 15)),
    axis.title.y = element_text(size = 15, margin = margin(r = 15)),
    axis.text = element_text(size = 15)
  )

print(scad_marginal1_country)

pdata_clmm <- pdata %>%
  filter(!is.na(participants_max),
         !is.na(ndeath_log),
         !is.na(v2x_polyarchy),
         !is.na(v2xel_locelec),
         !is.na(gdppc),
         !is.na(pop))

poly_terms <- poly(pdata_clmm$ndeath_log, 2)
pdata_clmm$poly1 <- poly_terms[, 1]
pdata_clmm$poly2 <- poly_terms[, 2]

m_clmm_quad <- clmm(as.factor(participants_max) ~ poly1 + poly2 + n_events_recent365 +
                      v2x_polyarchy + v2xel_locelec + gdppc + pop +
                      (1 | ccode),
                    data = pdata_clmm,
                    Hess = TRUE)

scad_country <- m_clmm_quad

coefs <- coef(m_clmm_quad)
vcov_mat <- vcov(m_clmm_quad)

x_seq <- seq(min(pdata_clmm$ndeath_log, na.rm = TRUE),
             max(pdata_clmm$ndeath_log, na.rm = TRUE),
             length.out = 100)

poly_deriv <- predict(poly_terms, newdata = x_seq, deriv = 1)

grad_matrix <- cbind(poly_deriv[, 1], poly_deriv[, 2])
colnames(grad_matrix) <- c("poly1", "poly2")

marginal_effect <- grad_matrix[, 1] * coefs["poly1"] +
  grad_matrix[, 2] * coefs["poly2"]

marginal_se <- apply(grad_matrix, 1, function(g) {
  sqrt(t(g) %*% vcov_mat[c("poly1", "poly2"), c("poly1", "poly2")] %*% g)
})

prediction_data <- data.frame(
  ndeath_log = x_seq,
  marginal_effect = marginal_effect,
  ci_low = marginal_effect - 1.96 * marginal_se,
  ci_high = marginal_effect + 1.96 * marginal_se
)

scad_marginal2_country <- ggplot(prediction_data, aes(x = ndeath_log, y = marginal_effect)) +
  geom_line(color = "black", size = 0.5) +
  geom_ribbon(aes(ymin = ci_low, ymax = ci_high), fill = "black", alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Marginal Effect of Simple Repression\n(SCAD: Country)",
    x = "Fatalities",
    y = "Marginal Effect on Protest Participation"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 15, margin = margin(t = 15)),
    axis.title.y = element_text(size = 15, margin = margin(r = 15)),
    axis.text = element_text(size = 15)
  )

print(scad_marginal2_country)

# Step 1: Prepare data
pdata_clmm_exp <- pdata %>%
  filter(!is.na(participants_max),
         !is.na(fatality_diff_signed_weighted_exp),
         !is.na(v2x_polyarchy),
         !is.na(v2xel_locelec),
         !is.na(gdppc),
         !is.na(pop))

# Step 2: Fit quadratic clmm model with exponential variable
poly_terms_exp <- poly(pdata_clmm_exp$fatality_diff_signed_weighted_exp, 2)
pdata_clmm_exp$poly1 <- poly_terms_exp[, 1]
pdata_clmm_exp$poly2 <- poly_terms_exp[, 2]

m_clmm_quad_exp <- clmm(as.factor(participants_max) ~ poly1 + poly2 + n_events_recent365 +
                          v2x_polyarchy + v2xel_locelec + gdppc + pop +
                          (1 | ccode),
                        data = pdata_clmm_exp,
                        Hess = TRUE)

# Step 3: Prepare prediction data
coefs_exp <- coef(m_clmm_quad_exp)
vcov_mat_exp <- vcov(m_clmm_quad_exp)

scad_result1_country_exp <- m_clmm_quad_exp

x_seq_exp <- seq(min(pdata_clmm_exp$fatality_diff_signed_weighted_exp, na.rm = TRUE),
                 max(pdata_clmm_exp$fatality_diff_signed_weighted_exp, na.rm = TRUE),
                 length.out = 100)

poly_deriv_exp <- predict(poly_terms_exp, newdata = x_seq_exp, deriv = 1)

grad_matrix_exp <- cbind(poly_deriv_exp[, 1], poly_deriv_exp[, 2])
colnames(grad_matrix_exp) <- c("poly1", "poly2")

marginal_effect_exp <- grad_matrix_exp[, 1] * coefs_exp["poly1"] +
  grad_matrix_exp[, 2] * coefs_exp["poly2"]

marginal_se_exp <- apply(grad_matrix_exp, 1, function(g) {
  sqrt(t(g) %*% vcov_mat_exp[c("poly1", "poly2"), c("poly1", "poly2")] %*% g)
})

prediction_data_exp <- data.frame(
  x = x_seq_exp,
  marginal_effect = marginal_effect_exp,
  ci_low = marginal_effect_exp - 1.96 * marginal_se_exp,
  ci_high = marginal_effect_exp + 1.96 * marginal_se_exp
)

# Step 4: Plot
scad_marginal1_country_exp <- ggplot(prediction_data_exp, aes(x = x, y = marginal_effect)) +
  geom_line(color = "black", size = 0.5) +
  geom_ribbon(aes(ymin = ci_low, ymax = ci_high), fill = "black", alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Marginal Effect of Repression Deviations\n(SCAD: Country, Exponential Decay)",
    x = "Repression Deviations (Exponential Decay)",
    y = "Marginal Effect on Protest Participation"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    axis.title.x = element_text(size = 15, margin = margin(t = 15)),
    axis.title.y = element_text(size = 15, margin = margin(r = 15)),
    axis.text = element_text(size = 15)
  )

# Step 5: Print plot
print(scad_marginal1_country_exp)

library(dplyr)
library(psych)  
library(knitr)
library(kableExtra)

vars <- c("v2x_polyarchy", 
          "v2xel_locelec", "fatality_diff_signed_weighted", 
          "fatality_diff_signed_weighted_exp",
          "gdppc", "pop", "participants_max",
          "n_events_recent365", "ndeath_log")

descriptive_stats <- pdata_clmm %>%
  dplyr::select(all_of(vars)) %>%
  psych::describe() %>% 
  as.data.frame() %>% 
  dplyr::select(n, mean, sd, min, max)

descriptive_stats <- descriptive_stats %>%
  dplyr::select(everything())

kable(descriptive_stats, format = "latex", booktabs = TRUE,
      digits = 3, caption = "Descriptive Statistics for Key Variables") %>%
  kable_styling(latex_options = c("hold_position", "striped"))

